🎨 Advanced ggplot2 - Part 1

Geometries və Advanced Aesthetics

1 Dataset Hazırlığı və Geometries

1.1 Demo Dataset

set.seed(2024)
n <- 200

academic_data <- data.frame(
  student_id = 1:n,
  age = sample(18:25, n, replace = TRUE),
  gender = sample(c("Male", "Female"), n, replace = TRUE),
  faculty = sample(c("Engineering", "Medicine", "Economics", "IT"), n, replace = TRUE),
  math_score = round(rnorm(n, 75, 15), 1),
  physics_score = round(rnorm(n, 72, 16), 1),
  gpa = round(runif(n, 2.0, 4.0), 2),
  study_hours_week = round(pmax(5, rnorm(n, 30, 10)), 1),
  life_satisfaction = round(runif(n, 1, 10), 1),
  stress_level = round(runif(n, 1, 10), 1),
  stringsAsFactors = FALSE
)

academic_data$total_score <- academic_data$math_score + academic_data$physics_score
academic_data$performance_level <- cut(academic_data$gpa, 
                                      breaks = c(0, 2.5, 3.0, 3.5, 4.0),
                                      labels = c("Below Average", "Average", "Good", "Excellent"))

cat("Dataset yaradıldı:", nrow(academic_data), "tələbə\n")
#> Dataset yaradıldı: 200 tələbə
head(academic_data, 3)
#>   student_id age gender     faculty math_score physics_score  gpa
#> 1          1  19   Male    Medicine       85.7          84.4 2.34
#> 2          2  22   Male Engineering       65.5          60.3 2.56
#> 3          3  22   Male          IT       70.2          84.8 3.76
#>   study_hours_week life_satisfaction stress_level total_score performance_level
#> 1             28.2               5.3          7.3       170.1     Below Average
#> 2             28.0               8.5          4.9       125.8           Average
#> 3             41.0               3.8          6.5       155.0         Excellent

1.2 Advanced Scatter Plots

# Basic scatter plot
plot(academic_data$math_score, academic_data$gpa,
     main = "Math Score vs GPA",
     xlab = "Math Score", ylab = "GPA",
     pch = 16, col = "steelblue", cex = 0.8)
abline(lm(gpa ~ math_score, data = academic_data), col = "red", lwd = 2)

cat("ggplot2 kod:\n")
#> ggplot2 kod:
cat("ggplot(data, aes(x = math_score, y = gpa)) +\n")
#> ggplot(data, aes(x = math_score, y = gpa)) +
cat("  geom_point() + geom_smooth(method = 'lm')\n")
#>   geom_point() + geom_smooth(method = 'lm')
# Multi-dimensional plot
faculty_colors <- rainbow(length(unique(academic_data$faculty)))
faculty_numeric <- as.numeric(as.factor(academic_data$faculty))

plot(academic_data$math_score, academic_data$gpa,
     main = "Multi-dimensional Visualization",
     xlab = "Math Score", ylab = "GPA",
     col = faculty_colors[faculty_numeric],
     pch = 16, cex = academic_data$study_hours_week / 25)

legend("bottomright", 
       legend = unique(academic_data$faculty),
       col = faculty_colors, pch = 16, cex = 0.7)

cat("\nggplot2 multi-dimensional:\n")
#> 
#> ggplot2 multi-dimensional:
cat("ggplot(data, aes(x = math_score, y = gpa)) +\n")
#> ggplot(data, aes(x = math_score, y = gpa)) +
cat("  geom_point(aes(color = faculty, size = study_hours_week))\n")
#>   geom_point(aes(color = faculty, size = study_hours_week))

1.3 Bar Charts və Line Plots

# Faculty distribution
faculty_counts <- table(academic_data$faculty)
barplot(faculty_counts,
        main = "Students by Faculty",
        col = rainbow(length(faculty_counts)),
        las = 2)

cat("ggplot2 bar chart:\n")
#> ggplot2 bar chart:
cat("ggplot(data, aes(x = faculty)) + geom_bar()\n")
#> ggplot(data, aes(x = faculty)) + geom_bar()
# Performance levels
performance_counts <- table(academic_data$performance_level)
pie(performance_counts, 
    main = "Performance Distribution",
    col = c("red", "orange", "lightgreen", "darkgreen"))

# Time series simulation
sample_students <- sample(academic_data$student_id, 5)
plot(1, type = "n", xlim = c(1, 6), ylim = c(2, 4),
     main = "GPA Progression Simulation",
     xlab = "Semester", ylab = "GPA")

colors <- rainbow(5)
for(i in 1:5) {
  gpa_trend <- academic_data$gpa[i] + cumsum(rnorm(6, 0, 0.1))
  lines(1:6, gpa_trend, col = colors[i], lwd = 2, type = "b")
}

cat("\nggplot2 line plot:\n")
#> 
#> ggplot2 line plot:
cat("ggplot(data, aes(x = semester, y = gpa, color = student_id)) +\n")
#> ggplot(data, aes(x = semester, y = gpa, color = student_id)) +
cat("  geom_line() + geom_point()\n")
#>   geom_line() + geom_point()

1.4 Advanced Aesthetics

# Color gradients
plot(academic_data$total_score, academic_data$stress_level,
     main = "Score vs Stress (Color: Life Satisfaction)",
     xlab = "Total Score", ylab = "Stress Level",
     col = heat.colors(10)[cut(academic_data$life_satisfaction, breaks = 10)],
     pch = 16, cex = 1.2)

# Add legend
legend_colors <- heat.colors(5)
legend("topright", legend = c("Low", "", "Med", "", "High"),
       col = legend_colors, pch = 16, title = "Satisfaction")

cat("ggplot2 color gradient:\n")
#> ggplot2 color gradient:
cat("ggplot(data, aes(x = total_score, y = stress_level)) +\n")
#> ggplot(data, aes(x = total_score, y = stress_level)) +
cat("  geom_point(aes(color = life_satisfaction)) +\n")
#>   geom_point(aes(color = life_satisfaction)) +
cat("  scale_color_gradient2(low = 'red', high = 'green')\n")
#>   scale_color_gradient2(low = 'red', high = 'green')
# Shape mapping
gender_shapes <- c("Male" = 16, "Female" = 17)
plot(academic_data$math_score, academic_data$physics_score,
     main = "Math vs Physics (Shape: Gender)",
     xlab = "Math Score", ylab = "Physics Score",
     pch = gender_shapes[academic_data$gender],
     col = "darkblue", cex = 1.2)

legend("bottomright", legend = names(gender_shapes),
       pch = gender_shapes, title = "Gender")

cat("\nggplot2 shape mapping:\n")
#> 
#> ggplot2 shape mapping:
cat("ggplot(data, aes(x = math_score, y = physics_score)) +\n")
#> ggplot(data, aes(x = math_score, y = physics_score)) +
cat("  geom_point(aes(shape = gender))\n")
#>   geom_point(aes(shape = gender))
# Transparency for overplotting
par(mfrow = c(1, 2))

plot(academic_data$age, academic_data$gpa,
     main = "Without Transparency",
     xlab = "Age", ylab = "GPA",
     pch = 16, col = "red")

plot(academic_data$age, academic_data$gpa,
     main = "With Transparency",
     xlab = "Age", ylab = "GPA",
     pch = 16, col = rgb(1, 0, 0, alpha = 0.5))

par(mfrow = c(1, 1))

cat("ggplot2 transparency:\n")
#> ggplot2 transparency:
cat("ggplot(data, aes(x = age, y = gpa)) +\n")
#> ggplot(data, aes(x = age, y = gpa)) +
cat("  geom_point(alpha = 0.6)\n")
#>   geom_point(alpha = 0.6)

2 Statistical Layers

2.1 Regression və Smoothing

# Linear regression with confidence interval
plot(academic_data$study_hours_week, academic_data$gpa,
     main = "Study Hours vs GPA with Regression",
     xlab = "Study Hours per Week", ylab = "GPA",
     pch = 16, col = rgb(0.3, 0.6, 0.8, 0.7))

lm_model <- lm(gpa ~ study_hours_week, data = academic_data)
abline(lm_model, col = "red", lwd = 3)

# Add R-squared
r_sq <- summary(lm_model)$r.squared
text(40, 3.5, paste("R² =", round(r_sq, 3)), 
     col = "red", font = 2, cex = 1.2)

cat("ggplot2 regression:\n")
#> ggplot2 regression:
cat("ggplot(data, aes(x = study_hours_week, y = gpa)) +\n")
#> ggplot(data, aes(x = study_hours_week, y = gpa)) +
cat("  geom_point() +\n")
#>   geom_point() +
cat("  geom_smooth(method = 'lm', se = TRUE)\n")
#>   geom_smooth(method = 'lm', se = TRUE)
# Box plots with means
boxplot(gpa ~ faculty, data = academic_data,
        main = "GPA by Faculty",
        col = rainbow(4, alpha = 0.7))

# Add mean points
faculty_means <- aggregate(gpa ~ faculty, data = academic_data, mean)
for(i in 1:nrow(faculty_means)) {
  points(i, faculty_means$gpa[i], pch = 18, cex = 2, col = "red")
}

cat("\nggplot2 box plots:\n")
#> 
#> ggplot2 box plots:
cat("ggplot(data, aes(x = faculty, y = gpa)) +\n")
#> ggplot(data, aes(x = faculty, y = gpa)) +
cat("  geom_boxplot() +\n")
#>   geom_boxplot() +
cat("  stat_summary(fun = mean, geom = 'point', shape = 18, size = 4)\n")
#>   stat_summary(fun = mean, geom = 'point', shape = 18, size = 4)

2.2 Advanced Geom Combinations

# Violin plot simulation
performance_levels <- unique(academic_data$performance_level)
performance_colors <- c("red", "orange", "lightgreen", "darkgreen")

plot(1, type = "n", 
     xlim = c(0.5, length(performance_levels) + 0.5),
     ylim = range(academic_data$total_score, na.rm = TRUE),
     main = "Score Distribution by Performance",
     xlab = "Performance Level", ylab = "Total Score",
     xaxt = "n")

for(i in 1:length(performance_levels)) {
  perf_data <- academic_data[academic_data$performance_level == performance_levels[i], ]$total_score
  
  if(length(perf_data) > 3) {
    density_est <- density(perf_data)
    density_scaled <- density_est$y / max(density_est$y) * 0.3
    
    polygon(c(i - density_scaled, i + rev(density_scaled)),
            c(density_est$x, rev(density_est$x)),
            col = rgb(t(col2rgb(performance_colors[i]))/255, alpha = 0.4),
            border = performance_colors[i])
    
    # Add median line
    median_val <- median(perf_data)
    segments(i - 0.1, median_val, i + 0.1, median_val, lwd = 3)
  }
}

axis(1, at = 1:length(performance_levels), labels = performance_levels, las = 2)

cat("ggplot2 violin plots:\n")
#> ggplot2 violin plots:
cat("ggplot(data, aes(x = performance_level, y = total_score)) +\n")
#> ggplot(data, aes(x = performance_level, y = total_score)) +
cat("  geom_violin() +\n")
#>   geom_violin() +
cat("  geom_boxplot(width = 0.1)\n")
#>   geom_boxplot(width = 0.1)
# Correlation heatmap
numeric_vars <- c("math_score", "physics_score", "gpa", "study_hours_week", "life_satisfaction")
cor_matrix <- cor(academic_data[, numeric_vars], use = "complete.obs")

heatmap(cor_matrix, 
        main = "Correlation Matrix",
        col = colorRampPalette(c("red", "white", "blue"))(50),
        symm = TRUE)

cat("\nggplot2 heatmap:\n")
#> 
#> ggplot2 heatmap:
cat("library(reshape2)\n")
#> library(reshape2)
cat("cor_melted <- melt(cor_matrix)\n")
#> cor_melted <- melt(cor_matrix)
cat("ggplot(cor_melted, aes(x = Var1, y = Var2, fill = value)) +\n")
#> ggplot(cor_melted, aes(x = Var1, y = Var2, fill = value)) +
cat("  geom_tile() +\n")
#>   geom_tile() +
cat("  scale_fill_gradient2(low = 'red', high = 'blue')\n")
#>   scale_fill_gradient2(low = 'red', high = 'blue')

2.3 Annotations və Text

# Advanced annotations
plot(academic_data$total_score, academic_data$life_satisfaction,
     main = "Academic Performance vs Life Satisfaction",
     xlab = "Total Score", ylab = "Life Satisfaction",
     pch = 16, col = rgb(0.3, 0.6, 0.8, 0.6), cex = 1.1)

# Add regression line
lm_sat <- lm(life_satisfaction ~ total_score, data = academic_data)
abline(lm_sat, col = "red", lwd = 2)

# Find outliers
top_performer <- academic_data[which.max(academic_data$total_score), ]
points(top_performer$total_score, top_performer$life_satisfaction, 
       col = "green", pch = 17, cex = 2)
text(top_performer$total_score, top_performer$life_satisfaction + 0.5,
     "Top Performer", col = "green", font = 2)

# Add correlation
correlation <- cor(academic_data$total_score, academic_data$life_satisfaction)
text(100, 9, paste("r =", round(correlation, 3)), 
     col = "red", font = 2, cex = 1.2)

cat("ggplot2 annotations:\n")
#> ggplot2 annotations:
cat("ggplot(data, aes(x = total_score, y = life_satisfaction)) +\n")
#> ggplot(data, aes(x = total_score, y = life_satisfaction)) +
cat("  geom_point() +\n")
#>   geom_point() +
cat("  geom_smooth(method = 'lm') +\n")
#>   geom_smooth(method = 'lm') +
cat("  annotate('text', x = 100, y = 9, label = 'r = 0.45') +\n")
#>   annotate('text', x = 100, y = 9, label = 'r = 0.45') +
cat("  annotate('point', x = max_score, y = max_sat, color = 'green')\n")
#>   annotate('point', x = max_score, y = max_sat, color = 'green')
# Directional arrows
plot(academic_data$study_hours_week, academic_data$stress_level,
     main = "Study Hours vs Stress (with Annotations)",
     xlab = "Study Hours per Week", ylab = "Stress Level",
     pch = 16, col = rgb(0.4, 0.6, 0.8, 0.7))

# Find efficient students (high hours, low stress)
efficient <- academic_data[academic_data$study_hours_week > 40 & 
                          academic_data$stress_level < 3, ]

if(nrow(efficient) > 0) {
  student <- efficient[1, ]
  arrows(student$study_hours_week - 5, student$stress_level + 1,
         student$study_hours_week, student$stress_level,
         col = "green", lwd = 2, length = 0.1)
  text(student$study_hours_week - 5, student$stress_level + 1.5,
       "Efficient", col = "green", font = 2)
}

cat("\nggplot2 arrows:\n")
#> 
#> ggplot2 arrows:
cat("ggplot(data, aes(x = study_hours_week, y = stress_level)) +\n")
#> ggplot(data, aes(x = study_hours_week, y = stress_level)) +
cat("  geom_point() +\n")
#>   geom_point() +
cat("  annotate('segment', x = 35, y = 4, xend = 42, yend = 2.5,\n")
#>   annotate('segment', x = 35, y = 4, xend = 42, yend = 2.5,
cat("           arrow = arrow(), color = 'green')\n")
#>            arrow = arrow(), color = 'green')

📚 Part 1 Xülasəsi

Bu hissədə öyrəndiklərimiz:

  • Advanced scatter plots və multi-dimensional mapping
  • Bar charts və line plots
  • Aesthetic mappings (color, shape, size, alpha)
  • Statistical layers (regression, smoothing)
  • Geom combinations və annotations

Növbəti hissədə: Themes, customization və publication-ready graphics


🎯 Part 1 Tamamlandı!

Part 2-də themes və professional visualization öyrənəcəyik.